set.seed(1)
project_2_data =
read_csv("project_2_data.csv", na = c("NA", "", ".")) |>
janitor::clean_names() |>
mutate(status = ifelse(status == "Dead", 1, 0))
## Rows: 4024 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Race, Marital Status, T Stage, N Stage, 6th Stage, differentiate, ...
## dbl (5): Age, Tumor Size, Regional Node Examined, Reginol Node Positive, Su...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dead <- project_2_data |>
filter(status == 1)
alive <- project_2_data |>
filter(status == 0) |>
sample_n(nrow(dead))
project_2_numdata<-
bind_rows(dead, alive) |>
mutate(
t_stage = case_when(
t_stage == "T1" ~ 1,
t_stage == "T2" ~ 2,
t_stage == "T3" ~ 3,
t_stage == "T4" ~ 4,
TRUE ~ NA_real_),
n_stage = case_when(
n_stage == "N1" ~ 1,
n_stage == "N2" ~ 2,
n_stage == "N3" ~ 3,
TRUE ~ NA_real_),
differentiate = case_when(
differentiate == "Well differentiated" ~ 1,
differentiate == "Moderately differentiated" ~ 2,
differentiate == "Poorly differentiated" ~ 3,
differentiate == "Undifferentiated" ~ 4,
TRUE ~ NA_real_),
grade = case_when(
grade == "anaplastic; Grade IV" ~ 4,
grade == "3" ~ 3,
grade == "2" ~ 2,
grade == "1" ~ 1,
TRUE ~ NA_real_),
a_stage_regional = ifelse(a_stage == "Regional", 1, 0),
estrogen_status = ifelse(estrogen_status == "Positive", 1, 0),
progesterone_status = ifelse(progesterone_status == "Positive", 1, 0)
) |>
select(-a_stage)
variables <- names(project_2_numdata)[!names(project_2_numdata) %in% c("survival_months", "status")]
for (var in variables) {
formula <- as.formula(paste("Surv(survival_months, status) ~", var))
model <- coxph(formula, data = project_2_numdata)
print(var)
print(summary(model))
}
## [1] "age"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.013313 1.013402 0.004454 2.989 0.0028 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.013 0.9868 1.005 1.022
##
## Concordance= 0.536 (se = 0.012 )
## Likelihood ratio test= 9.05 on 1 df, p=0.003
## Wald test = 8.93 on 1 df, p=0.003
## Score (logrank) test = 8.95 on 1 df, p=0.003
##
## [1] "race"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## raceOther -0.5965 0.5508 0.2099 -2.842 0.00448 **
## raceWhite -0.3263 0.7216 0.1252 -2.607 0.00914 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## raceOther 0.5508 1.816 0.3650 0.8310
## raceWhite 0.7216 1.386 0.5646 0.9222
##
## Concordance= 0.527 (se = 0.008 )
## Likelihood ratio test= 9.46 on 2 df, p=0.009
## Wald test = 9.81 on 2 df, p=0.007
## Score (logrank) test = 9.92 on 2 df, p=0.007
##
## [1] "marital_status"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## marital_statusMarried -0.23876 0.78761 0.11794 -2.024 0.0429 *
## marital_statusSeparated 0.53873 1.71383 0.27908 1.930 0.0536 .
## marital_statusSingle -0.10841 0.89726 0.14404 -0.753 0.4517
## marital_statusWidowed 0.04877 1.04998 0.17757 0.275 0.7836
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## marital_statusMarried 0.7876 1.2697 0.6251 0.9924
## marital_statusSeparated 1.7138 0.5835 0.9918 2.9616
## marital_statusSingle 0.8973 1.1145 0.6766 1.1899
## marital_statusWidowed 1.0500 0.9524 0.7414 1.4871
##
## Concordance= 0.537 (se = 0.011 )
## Likelihood ratio test= 12.55 on 4 df, p=0.01
## Wald test = 14 on 4 df, p=0.007
## Score (logrank) test = 14.36 on 4 df, p=0.006
##
## [1] "t_stage"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## t_stage 0.38440 1.46874 0.04713 8.156 3.48e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## t_stage 1.469 0.6809 1.339 1.611
##
## Concordance= 0.582 (se = 0.011 )
## Likelihood ratio test= 62.94 on 1 df, p=2e-15
## Wald test = 66.51 on 1 df, p=3e-16
## Score (logrank) test = 67.36 on 1 df, p=2e-16
##
## [1] "n_stage"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## n_stage 0.61870 1.85652 0.04835 12.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## n_stage 1.857 0.5386 1.689 2.041
##
## Concordance= 0.623 (se = 0.011 )
## Likelihood ratio test= 152 on 1 df, p=<2e-16
## Wald test = 163.7 on 1 df, p=<2e-16
## Score (logrank) test = 175.6 on 1 df, p=<2e-16
##
## [1] "x6th_stage"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## x6th_stageIIB 0.4390 1.5512 0.1336 3.287 0.00101 **
## x6th_stageIIIA 0.7669 2.1530 0.1260 6.088 1.15e-09 ***
## x6th_stageIIIB 1.4792 4.3893 0.2464 6.003 1.94e-09 ***
## x6th_stageIIIC 1.5199 4.5718 0.1274 11.934 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## x6th_stageIIB 1.551 0.6447 1.194 2.015
## x6th_stageIIIA 2.153 0.4645 1.682 2.756
## x6th_stageIIIB 4.389 0.2278 2.708 7.115
## x6th_stageIIIC 4.572 0.2187 3.562 5.868
##
## Concordance= 0.637 (se = 0.012 )
## Likelihood ratio test= 171.5 on 4 df, p=<2e-16
## Wald test = 177.7 on 4 df, p=<2e-16
## Score (logrank) test = 198.8 on 4 df, p=<2e-16
##
## [1] "differentiate"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## differentiate 0.44124 1.55463 0.06433 6.859 6.94e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## differentiate 1.555 0.6432 1.37 1.764
##
## Concordance= 0.574 (se = 0.011 )
## Likelihood ratio test= 48.05 on 1 df, p=4e-12
## Wald test = 47.04 on 1 df, p=7e-12
## Score (logrank) test = 47.37 on 1 df, p=6e-12
##
## [1] "grade"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## grade 0.44124 1.55463 0.06433 6.859 6.94e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## grade 1.555 0.6432 1.37 1.764
##
## Concordance= 0.574 (se = 0.011 )
## Likelihood ratio test= 48.05 on 1 df, p=4e-12
## Wald test = 47.04 on 1 df, p=7e-12
## Score (logrank) test = 47.37 on 1 df, p=6e-12
##
## [1] "tumor_size"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## tumor_size 0.011933 1.012005 0.001574 7.58 3.46e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## tumor_size 1.012 0.9881 1.009 1.015
##
## Concordance= 0.592 (se = 0.012 )
## Likelihood ratio test= 50.4 on 1 df, p=1e-12
## Wald test = 57.45 on 1 df, p=3e-14
## Score (logrank) test = 58.25 on 1 df, p=2e-14
##
## [1] "estrogen_status"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## estrogen_status -0.9108 0.4022 0.1064 -8.562 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## estrogen_status 0.4022 2.486 0.3265 0.4954
##
## Concordance= 0.565 (se = 0.008 )
## Likelihood ratio test= 60.02 on 1 df, p=9e-15
## Wald test = 73.31 on 1 df, p=<2e-16
## Score (logrank) test = 78.48 on 1 df, p=<2e-16
##
## [1] "progesterone_status"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## progesterone_status -0.71314 0.49010 0.08583 -8.308 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## progesterone_status 0.4901 2.04 0.4142 0.5799
##
## Concordance= 0.588 (se = 0.01 )
## Likelihood ratio test= 62.82 on 1 df, p=2e-15
## Wald test = 69.03 on 1 df, p=<2e-16
## Score (logrank) test = 71.97 on 1 df, p=<2e-16
##
## [1] "regional_node_examined"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## regional_node_examined 0.010506 1.010561 0.004982 2.109 0.035 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## regional_node_examined 1.011 0.9895 1.001 1.02
##
## Concordance= 0.521 (se = 0.013 )
## Likelihood ratio test= 4.35 on 1 df, p=0.04
## Wald test = 4.45 on 1 df, p=0.03
## Score (logrank) test = 4.44 on 1 df, p=0.04
##
## [1] "reginol_node_positive"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## reginol_node_positive 0.054926 1.056462 0.004627 11.87 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## reginol_node_positive 1.056 0.9466 1.047 1.066
##
## Concordance= 0.627 (se = 0.012 )
## Likelihood ratio test= 106.3 on 1 df, p=<2e-16
## Wald test = 140.9 on 1 df, p=<2e-16
## Score (logrank) test = 148 on 1 df, p=<2e-16
##
## [1] "a_stage_regional"
## Call:
## coxph(formula = formula, data = project_2_numdata)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## a_stage_regional -1.1046 0.3314 0.1747 -6.324 2.55e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## a_stage_regional 0.3314 3.018 0.2353 0.4666
##
## Concordance= 0.522 (se = 0.005 )
## Likelihood ratio test= 29.49 on 1 df, p=6e-08
## Wald test = 39.99 on 1 df, p=3e-10
## Score (logrank) test = 44.22 on 1 df, p=3e-11
All variables are statistically significant except for marriage in “marital_status”
project_2_numdata <-
project_2_numdata|>
mutate(Married = ifelse(marital_status == "Married", 1, 0)) |>
select(-differentiate, - marital_status)
cor_matrix <- cor(project_2_numdata[, sapply(project_2_numdata, is.numeric)])
print(cor_matrix)
## age t_stage n_stage grade
## age 1.00000000 -0.050113404 -0.01706295 -0.12321448
## t_stage -0.05011340 1.000000000 0.30612098 0.15076838
## n_stage -0.01706295 0.306120984 1.00000000 0.17991601
## grade -0.12321448 0.150768380 0.17991601 1.00000000
## tumor_size -0.07953275 0.784997546 0.30277440 0.14403058
## estrogen_status 0.12111658 -0.106722673 -0.12700846 -0.26247316
## progesterone_status 0.01233580 -0.071974613 -0.11108594 -0.21201525
## regional_node_examined -0.03141638 0.088442919 0.36808270 0.07651544
## reginol_node_positive 0.01628318 0.244266968 0.82874594 0.15163577
## survival_months -0.08599693 -0.159716020 -0.24737291 -0.15052637
## status 0.06976636 0.225250913 0.34556514 0.19151006
## a_stage_regional 0.04777171 -0.234675561 -0.25575034 -0.04874199
## Married -0.04796418 0.003435462 -0.05151826 -0.02717180
## tumor_size estrogen_status progesterone_status
## age -0.07953275 0.121116575 0.01233580
## t_stage 0.78499755 -0.106722673 -0.07197461
## n_stage 0.30277440 -0.127008462 -0.11108594
## grade 0.14403058 -0.262473159 -0.21201525
## tumor_size 1.00000000 -0.123020668 -0.08438705
## estrogen_status -0.12302067 1.000000000 0.58504596
## progesterone_status -0.08438705 0.585045962 1.00000000
## regional_node_examined 0.11473470 -0.041404301 -0.01390137
## reginol_node_positive 0.24878553 -0.100873585 -0.07105526
## survival_months -0.16151178 0.227274035 0.21200308
## status 0.19535827 -0.175795073 -0.20001424
## a_stage_regional -0.12796649 0.127990099 0.07311096
## Married -0.01036450 0.008579762 -0.01351800
## regional_node_examined reginol_node_positive
## age -0.03141638 0.01628318
## t_stage 0.08844292 0.24426697
## n_stage 0.36808270 0.82874594
## grade 0.07651544 0.15163577
## tumor_size 0.11473470 0.24878553
## estrogen_status -0.04140430 -0.10087358
## progesterone_status -0.01390137 -0.07105526
## regional_node_examined 1.00000000 0.49395643
## reginol_node_positive 0.49395643 1.00000000
## survival_months -0.03087185 -0.21263747
## status 0.06199862 0.31134369
## a_stage_regional -0.04237809 -0.19505294
## Married 0.02581466 -0.03581806
## survival_months status a_stage_regional
## age -0.08599693 0.06976636 0.04777171
## t_stage -0.15971602 0.22525091 -0.23467556
## n_stage -0.24737291 0.34556514 -0.25575034
## grade -0.15052637 0.19151006 -0.04874199
## tumor_size -0.16151178 0.19535827 -0.12796649
## estrogen_status 0.22727403 -0.17579507 0.12799010
## progesterone_status 0.21200308 -0.20001424 0.07311096
## regional_node_examined -0.03087185 0.06199862 -0.04237809
## reginol_node_positive -0.21263747 0.31134369 -0.19505294
## survival_months 1.00000000 -0.58302039 0.13738966
## status -0.58302039 1.00000000 -0.13123516
## a_stage_regional 0.13738966 -0.13123516 1.00000000
## Married 0.06161119 -0.08197981 0.03226076
## Married
## age -0.047964181
## t_stage 0.003435462
## n_stage -0.051518259
## grade -0.027171798
## tumor_size -0.010364496
## estrogen_status 0.008579762
## progesterone_status -0.013518004
## regional_node_examined 0.025814656
## reginol_node_positive -0.035818059
## survival_months 0.061611186
## status -0.081979813
## a_stage_regional 0.032260758
## Married 1.000000000
cortable<-project_2_numdata|>
select(-race,-x6th_stage)
cortable$regional_node_examined
## [1] 9 9 12 15 16 23 24 8 5 21 4 3 33 7 11 3 16 8 28 20 14 23 20 13
## [25] 15 14 20 17 11 23 3 34 1 17 17 4 12 27 14 20 28 29 18 3 4 13 23 18
## [49] 7 41 11 10 27 19 5 24 2 12 16 15 40 9 10 19 17 3 19 16 7 25 1 9
## [73] 10 15 9 11 29 21 17 17 7 15 22 47 22 14 9 9 13 14 10 19 15 6 2 54
## [97] 5 31 28 21 19 8 13 13 9 18 12 15 27 24 5 7 22 17 16 12 9 23 10 6
## [121] 19 5 15 10 30 18 15 27 14 3 36 16 5 9 3 32 31 21 18 10 3 20 10 25
## [145] 9 14 9 29 32 19 27 1 11 13 14 21 11 15 16 22 2 14 17 5 10 23 10 3
## [169] 17 18 18 23 9 9 8 23 19 12 9 26 12 15 23 9 14 22 16 30 4 2 10 13
## [193] 1 10 4 15 7 5 28 12 18 28 18 12 6 18 26 3 21 36 26 18 18 28 16 30
## [217] 2 1 21 14 21 4 10 16 19 15 29 15 34 23 27 17 13 12 3 13 14 36 11 12
## [241] 9 9 5 13 11 20 10 14 16 47 15 16 29 16 9 7 11 6 16 18 22 18 4 25
## [265] 29 1 13 13 9 21 25 10 2 8 21 2 26 17 4 9 13 16 10 13 1 6 9 9
## [289] 18 22 8 15 8 4 4 18 12 17 15 19 16 27 15 10 14 13 12 11 8 14 21 13
## [313] 17 4 19 12 17 7 25 37 8 24 13 22 15 3 13 3 2 19 12 11 11 2 20 12
## [337] 19 11 15 12 21 10 10 17 3 15 17 15 17 11 8 18 21 18 10 29 9 18 13 20
## [361] 35 23 12 7 32 18 18 5 28 10 10 18 11 9 9 1 30 3 7 9 19 57 5 22
## [385] 12 15 12 3 8 15 25 10 4 26 25 16 23 13 17 8 24 9 10 9 36 19 12 8
## [409] 21 8 10 30 17 11 12 20 22 14 4 11 1 4 18 20 12 19 15 21 16 18 11 16
## [433] 17 5 16 23 7 5 19 9 22 22 8 10 19 11 11 7 12 8 13 12 24 12 16 16
## [457] 20 13 3 30 22 35 11 10 7 30 29 14 12 17 2 24 15 10 9 12 4 19 23 28
## [481] 17 14 27 12 16 11 19 22 16 8 9 10 20 15 4 24 19 18 12 4 9 17 22 2
## [505] 11 6 2 16 21 20 12 17 21 7 15 13 6 4 9 30 2 29 13 3 8 5 14 6
## [529] 15 1 14 11 21 34 11 18 13 7 11 11 4 16 19 20 12 13 30 19 17 12 12 15
## [553] 12 9 12 6 8 2 12 28 14 24 24 4 26 7 18 8 18 13 22 9 28 21 10 13
## [577] 6 16 12 2 15 16 17 17 23 25 47 21 19 18 18 3 31 11 2 16 14 16 23 13
## [601] 14 7 10 7 16 25 28 10 16 22 23 21 21 19 6 2 8 6 12 2 5 14 15 8
## [625] 3 23 19 19 15 7 7 14 10 10 14 31 17 25 7 12 15 12 4 17 19 24 14 24
## [649] 6 25 6 19 34 16 3 7 15 19 2 26 9 21 2 9 13 10 17 13 4 15 5 13
## [673] 21 5 18 6 21 11 18 11 7 4 5 5 13 18 17 5 7 14 23 11 15 13 13 10
## [697] 43 47 20 14 9 26 7 12 28 1 5 18 18 12 11 20 41 23 10 20 16 21 26 12
## [721] 6 17 12 11 20 22 26 10 8 15 11 4 16 13 21 14 17 9 10 8 3 12 22 37
## [745] 18 7 11 23 13 25 12 17 29 1 12 7 1 23 4 10 25 13 1 9 9 11 26 9
## [769] 18 3 8 24 9 14 24 18 2 15 24 9 23 27 16 15 18 16 17 25 15 15 18 15
## [793] 12 2 13 20 9 13 16 15 15 9 23 13 2 20 19 29 22 2 15 9 23 7 12 3
## [817] 13 1 31 15 20 12 18 10 15 7 23 24 13 8 12 13 20 7 22 4 11 14 16 17
## [841] 10 13 11 19 17 15 25 15 19 9 20 16 14 6 14 11 13 16 7 17 16 13 5 35
## [865] 1 20 20 5 5 26 15 15 16 20 13 14 7 15 27 22 30 4 34 1 20 14 18 10
## [889] 16 8 29 4 9 8 3 10 25 18 20 9 17 28 15 20 3 18 9 21 14 7 1 12
## [913] 15 6 17 35 13 25 18 8 16 26 5 8 9 25 27 13 3 16 29 6 14 3 17 16
## [937] 17 32 8 16 3 2 16 20 24 13 19 19 11 3 14 12 8 6 1 23 7 2 4 13
## [961] 12 6 37 15 17 2 14 11 3 12 27 6 19 4 19 12 14 18 15 12 20 10 21 8
## [985] 17 17 8 20 15 10 6 21 6 8 17 24 7 13 14 27 15 23 15 11 1 24 13 26
## [1009] 16 18 5 16 28 19 12 14 14 23 17 18 31 12 11 9 20 16 18 18 2 13 4 14
## [1033] 12 13 4 3 2 8 11 3 2 2 2 13 10 1 16 12 18 5 28 16 13 15 7 22
## [1057] 17 11 10 16 14 9 8 16 15 6 18 14 15 14 3 22 38 3 5 17 9 23 14 13
## [1081] 9 24 9 9 6 16 18 17 10 24 8 27 7 22 10 12 28 14 9 7 17 26 11 23
## [1105] 9 11 12 32 32 6 13 16 8 19 37 3 18 14 10 21 13 12 15 9 6 7 9 13
## [1129] 31 13 18 4 6 18 12 3 16 11 3 29 26 14 9 8 29 18 27 11 20 19 9 16
## [1153] 13 3 9 2 12 10 7 12 25 26 18 14 17 18 9 9 12 13 14 18 30 14 6 15
## [1177] 2 19 16 14 9 10 9 5 7 11 4 14 10 4 9 12 12 3 16 16 17 24 9 11
## [1201] 16 19 11 14 3 4 5 5 17 12 16 22 11 10 8 18 21 10 4 25 3 19 14 16
## [1225] 15 16 16 11 18 13 11 12
chart.Correlation(cortable, histogram=TRUE, pch=19)
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
We have to choice 1 between t_stage - tumor size n stage -
regional_node_possitive
muldata = project_2_numdata
cox_model <- coxph(Surv(survival_months, status) ~.,
data = muldata)
vifs <- vif(cox_model)
## Warning in vif.default(cox_model): No intercept: vifs may not be sensible.
print(vifs)
## GVIF Df GVIF^(1/(2*Df))
## age 1.078511 1 1.038514
## race 1.089620 2 1.021689
## t_stage 4.204881 1 2.050581
## n_stage 16.052408 1 4.006546
## x6th_stage 42.723885 4 1.598946
## grade 1.132422 1 1.064153
## tumor_size 2.891806 1 1.700531
## estrogen_status 1.705080 1 1.305787
## progesterone_status 1.579316 1 1.256708
## regional_node_examined 1.766667 1 1.329160
## reginol_node_positive 4.281220 1 2.069111
## a_stage_regional 1.337110 1 1.156335
## Married 1.094899 1 1.046374
remove t-stage,n_stage and regional_mode_positive due to higher VIF score
final = muldata |>
select(-t_stage, -n_stage,-reginol_node_positive)
cox_model1 <- coxph(Surv(survival_months, status) ~.,
data = final)
vifs <- vif(cox_model1)
## Warning in vif.default(cox_model1): No intercept: vifs may not be sensible.
print(vifs)
## GVIF Df GVIF^(1/(2*Df))
## age 1.065375 1 1.032170
## race 1.084396 2 1.020462
## x6th_stage 2.150931 4 1.100470
## grade 1.123660 1 1.060028
## tumor_size 1.298750 1 1.139627
## estrogen_status 1.647493 1 1.283547
## progesterone_status 1.541107 1 1.241413
## regional_node_examined 1.247171 1 1.116768
## a_stage_regional 1.327119 1 1.152007
## Married 1.093934 1 1.045913
ph_test <- cox.zph(cox_model1)
print(ph_test)
## chisq df p
## age 0.1978 1 0.66
## race 0.9493 2 0.62
## x6th_stage 7.3617 4 0.12
## grade 0.1293 1 0.72
## tumor_size 0.0858 1 0.77
## estrogen_status 23.1261 1 1.5e-06
## progesterone_status 19.6293 1 9.4e-06
## regional_node_examined 0.0880 1 0.77
## a_stage_regional 0.6691 1 0.41
## Married 2.3184 1 0.13
## GLOBAL 47.1764 14 1.8e-05
plot(ph_test)
The Cox model assumes that hazard ratios are constant over time. A non-significant p-value (p > 0.05) indicates that the PH assumption holds. As GLOBAL 47.176 15 1.8e-05, the model did not meet the assumption, as same as estrogen_status and progesterone_status.We need further improve our model.
cox_model2 <- coxph(Surv(survival_months, status) ~ age + race + x6th_stage + grade +
tumor_size + regional_node_examined + Married,
data = final,x = TRUE)
summary(cox_model2)
## Call:
## coxph(formula = Surv(survival_months, status) ~ age + race +
## x6th_stage + grade + tumor_size + regional_node_examined +
## Married, data = final, x = TRUE)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.018010 1.018174 0.004577 3.935 8.32e-05 ***
## raceOther -0.448268 0.638733 0.213151 -2.103 0.03546 *
## raceWhite -0.316702 0.728548 0.128875 -2.457 0.01399 *
## x6th_stageIIB 0.410790 1.508008 0.137843 2.980 0.00288 **
## x6th_stageIIIA 0.720050 2.054536 0.139497 5.162 2.45e-07 ***
## x6th_stageIIIB 1.364084 3.912139 0.253810 5.374 7.68e-08 ***
## x6th_stageIIIC 1.496444 4.465782 0.152449 9.816 < 2e-16 ***
## grade 0.369090 1.446418 0.066157 5.579 2.42e-08 ***
## tumor_size 0.002424 1.002427 0.001884 1.287 0.19808
## regional_node_examined -0.014599 0.985508 0.005693 -2.564 0.01034 *
## Married -0.137910 0.871177 0.084599 -1.630 0.10307
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0182 0.9822 1.0091 1.0273
## raceOther 0.6387 1.5656 0.4206 0.9700
## raceWhite 0.7285 1.3726 0.5659 0.9379
## x6th_stageIIB 1.5080 0.6631 1.1510 1.9758
## x6th_stageIIIA 2.0545 0.4867 1.5631 2.7006
## x6th_stageIIIB 3.9121 0.2556 2.3789 6.4337
## x6th_stageIIIC 4.4658 0.2239 3.3123 6.0209
## grade 1.4464 0.6914 1.2705 1.6467
## tumor_size 1.0024 0.9976 0.9987 1.0061
## regional_node_examined 0.9855 1.0147 0.9746 0.9966
## Married 0.8712 1.1479 0.7381 1.0283
##
## Concordance= 0.669 (se = 0.012 )
## Likelihood ratio test= 237.1 on 11 df, p=<2e-16
## Wald test = 244 on 11 df, p=<2e-16
## Score (logrank) test = 265.9 on 11 df, p=<2e-16
ph_test <- cox.zph(cox_model2)
print(ph_test)
## chisq df p
## age 0.33833 1 0.561
## race 0.84145 2 0.657
## x6th_stage 5.93099 4 0.204
## grade 0.30431 1 0.581
## tumor_size 0.00191 1 0.965
## regional_node_examined 0.01952 1 0.889
## Married 2.98963 1 0.084
## GLOBAL 10.89580 11 0.452
plot(ph_test)
now the assumption is been fully achieved
cox_model3 <- coxph(Surv(survival_months, status) ~ age + race + x6th_stage + grade +
log2(tumor_size+1) + log2(regional_node_examined+1) + Married,
data = final)
summary(cox_model3)
## Call:
## coxph(formula = Surv(survival_months, status) ~ age + race +
## x6th_stage + grade + log2(tumor_size + 1) + log2(regional_node_examined +
## 1) + Married, data = final)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.017897 1.018058 0.004587 3.902 9.56e-05
## raceOther -0.469027 0.625611 0.212988 -2.202 0.027656
## raceWhite -0.325858 0.721907 0.128703 -2.532 0.011345
## x6th_stageIIB 0.366885 1.443232 0.146188 2.510 0.012084
## x6th_stageIIIA 0.701186 2.016143 0.149147 4.701 2.59e-06
## x6th_stageIIIB 1.323244 3.755587 0.257297 5.143 2.71e-07
## x6th_stageIIIC 1.486605 4.422059 0.159364 9.328 < 2e-16
## grade 0.364918 1.440396 0.066055 5.524 3.30e-08
## log2(tumor_size + 1) 0.091348 1.095650 0.052305 1.746 0.080735
## log2(regional_node_examined + 1) -0.168514 0.844919 0.051196 -3.292 0.000996
## Married -0.133509 0.875019 0.084670 -1.577 0.114837
##
## age ***
## raceOther *
## raceWhite *
## x6th_stageIIB *
## x6th_stageIIIA ***
## x6th_stageIIIB ***
## x6th_stageIIIC ***
## grade ***
## log2(tumor_size + 1) .
## log2(regional_node_examined + 1) ***
## Married
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0181 0.9823 1.0089 1.0273
## raceOther 0.6256 1.5984 0.4121 0.9497
## raceWhite 0.7219 1.3852 0.5610 0.9290
## x6th_stageIIB 1.4432 0.6929 1.0837 1.9221
## x6th_stageIIIA 2.0161 0.4960 1.5051 2.7007
## x6th_stageIIIB 3.7556 0.2663 2.2681 6.2186
## x6th_stageIIIC 4.4221 0.2261 3.2357 6.0433
## grade 1.4404 0.6943 1.2655 1.6395
## log2(tumor_size + 1) 1.0956 0.9127 0.9889 1.2139
## log2(regional_node_examined + 1) 0.8449 1.1835 0.7643 0.9341
## Married 0.8750 1.1428 0.7412 1.0330
##
## Concordance= 0.672 (se = 0.012 )
## Likelihood ratio test= 242.2 on 11 df, p=<2e-16
## Wald test = 247.1 on 11 df, p=<2e-16
## Score (logrank) test = 269.2 on 11 df, p=<2e-16
Add log transformation to the tumor size and regional node_examined, now they are being statistically significant
cox_model4 <- coxph(Surv(survival_months, status) ~ age + race + x6th_stage + grade +
log2(tumor_size+1) + log2(regional_node_examined+1) + Married + Married*log2(regional_node_examined+1),
data = final)
summary(cox_model4)
## Call:
## coxph(formula = Surv(survival_months, status) ~ age + race +
## x6th_stage + grade + log2(tumor_size + 1) + log2(regional_node_examined +
## 1) + Married + Married * log2(regional_node_examined + 1),
## data = final)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z
## age 0.018072 1.018237 0.004598 3.931
## raceOther -0.482448 0.617270 0.213173 -2.263
## raceWhite -0.337154 0.713799 0.128945 -2.615
## x6th_stageIIB 0.363303 1.438072 0.146187 2.485
## x6th_stageIIIA 0.700234 2.014224 0.149208 4.693
## x6th_stageIIIB 1.355960 3.880486 0.258443 5.247
## x6th_stageIIIC 1.488566 4.430738 0.159240 9.348
## grade 0.367548 1.444189 0.066096 5.561
## log2(tumor_size + 1) 0.091715 1.096053 0.052194 1.757
## log2(regional_node_examined + 1) -0.254087 0.775624 0.073140 -3.474
## Married -0.687701 0.502731 0.356035 -1.932
## log2(regional_node_examined + 1):Married 0.147764 1.159240 0.092403 1.599
## Pr(>|z|)
## age 8.47e-05 ***
## raceOther 0.023625 *
## raceWhite 0.008930 **
## x6th_stageIIB 0.012948 *
## x6th_stageIIIA 2.69e-06 ***
## x6th_stageIIIB 1.55e-07 ***
## x6th_stageIIIC < 2e-16 ***
## grade 2.68e-08 ***
## log2(tumor_size + 1) 0.078884 .
## log2(regional_node_examined + 1) 0.000513 ***
## Married 0.053414 .
## log2(regional_node_examined + 1):Married 0.109791
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95
## age 1.0182 0.9821 1.0091
## raceOther 0.6173 1.6200 0.4065
## raceWhite 0.7138 1.4010 0.5544
## x6th_stageIIB 1.4381 0.6954 1.0798
## x6th_stageIIIA 2.0142 0.4965 1.5035
## x6th_stageIIIB 3.8805 0.2577 2.3383
## x6th_stageIIIC 4.4307 0.2257 3.2429
## grade 1.4442 0.6924 1.2687
## log2(tumor_size + 1) 1.0961 0.9124 0.9895
## log2(regional_node_examined + 1) 0.7756 1.2893 0.6720
## Married 0.5027 1.9891 0.2502
## log2(regional_node_examined + 1):Married 1.1592 0.8626 0.9672
## upper .95
## age 1.0275
## raceOther 0.9374
## raceWhite 0.9190
## x6th_stageIIB 1.9152
## x6th_stageIIIA 2.6984
## x6th_stageIIIB 6.4398
## x6th_stageIIIC 6.0537
## grade 1.6439
## log2(tumor_size + 1) 1.2141
## log2(regional_node_examined + 1) 0.8952
## Married 1.0102
## log2(regional_node_examined + 1):Married 1.3894
##
## Concordance= 0.673 (se = 0.012 )
## Likelihood ratio test= 244.8 on 12 df, p=<2e-16
## Wald test = 248.3 on 12 df, p=<2e-16
## Score (logrank) test = 271.1 on 12 df, p=<2e-16
cox_model5 <- coxph(Surv(survival_months, status) ~ age + race + x6th_stage + grade +
log2(tumor_size+1) + log2(regional_node_examined+1),
data = final)
summary(cox_model5)
## Call:
## coxph(formula = Surv(survival_months, status) ~ age + race +
## x6th_stage + grade + log2(tumor_size + 1) + log2(regional_node_examined +
## 1), data = final)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.018557 1.018731 0.004578 4.054 5.04e-05
## raceOther -0.516231 0.596765 0.210954 -2.447 0.014400
## raceWhite -0.368703 0.691631 0.125884 -2.929 0.003401
## x6th_stageIIB 0.353730 1.424370 0.146028 2.422 0.015420
## x6th_stageIIIA 0.685477 1.984718 0.149028 4.600 4.23e-06
## x6th_stageIIIB 1.292146 3.640592 0.256483 5.038 4.71e-07
## x6th_stageIIIC 1.483916 4.410184 0.159225 9.320 < 2e-16
## grade 0.368485 1.445543 0.066109 5.574 2.49e-08
## log2(tumor_size + 1) 0.097260 1.102147 0.052088 1.867 0.061869
## log2(regional_node_examined + 1) -0.168850 0.844636 0.051210 -3.297 0.000977
##
## age ***
## raceOther *
## raceWhite **
## x6th_stageIIB *
## x6th_stageIIIA ***
## x6th_stageIIIB ***
## x6th_stageIIIC ***
## grade ***
## log2(tumor_size + 1) .
## log2(regional_node_examined + 1) ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0187 0.9816 1.0096 1.0279
## raceOther 0.5968 1.6757 0.3947 0.9023
## raceWhite 0.6916 1.4459 0.5404 0.8852
## x6th_stageIIB 1.4244 0.7021 1.0699 1.8964
## x6th_stageIIIA 1.9847 0.5038 1.4820 2.6580
## x6th_stageIIIB 3.6406 0.2747 2.2022 6.0185
## x6th_stageIIIC 4.4102 0.2267 3.2279 6.0254
## grade 1.4455 0.6918 1.2699 1.6455
## log2(tumor_size + 1) 1.1021 0.9073 0.9952 1.2206
## log2(regional_node_examined + 1) 0.8446 1.1839 0.7640 0.9338
##
## Concordance= 0.669 (se = 0.012 )
## Likelihood ratio test= 239.8 on 10 df, p=<2e-16
## Wald test = 243.7 on 10 df, p=<2e-16
## Score (logrank) test = 266 on 10 df, p=<2e-16
cox_model6 <- coxph(Surv(survival_months, status) ~ age + race + x6th_stage + grade +
log2(regional_node_examined+1),
data = final)
summary(cox_model6)
## Call:
## coxph(formula = Surv(survival_months, status) ~ age + race +
## x6th_stage + grade + log2(regional_node_examined + 1), data = final)
##
## n= 1232, number of events= 616
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.017705 1.017863 0.004567 3.877 0.000106
## raceOther -0.513619 0.598326 0.210909 -2.435 0.014881
## raceWhite -0.367476 0.692480 0.125910 -2.919 0.003517
## x6th_stageIIB 0.461184 1.585950 0.134452 3.430 0.000603
## x6th_stageIIIA 0.821688 2.274336 0.129512 6.345 2.23e-10
## x6th_stageIIIB 1.413804 4.111567 0.247599 5.710 1.13e-08
## x6th_stageIIIC 1.622178 5.064110 0.140484 11.547 < 2e-16
## grade 0.374943 1.454908 0.066050 5.677 1.37e-08
## log2(regional_node_examined + 1) -0.173489 0.840726 0.051254 -3.385 0.000712
##
## age ***
## raceOther *
## raceWhite **
## x6th_stageIIB ***
## x6th_stageIIIA ***
## x6th_stageIIIB ***
## x6th_stageIIIC ***
## grade ***
## log2(regional_node_examined + 1) ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0179 0.9825 1.0088 1.0270
## raceOther 0.5983 1.6713 0.3957 0.9046
## raceWhite 0.6925 1.4441 0.5410 0.8863
## x6th_stageIIB 1.5859 0.6305 1.2186 2.0641
## x6th_stageIIIA 2.2743 0.4397 1.7645 2.9315
## x6th_stageIIIB 4.1116 0.2432 2.5308 6.6798
## x6th_stageIIIC 5.0641 0.1975 3.8452 6.6693
## grade 1.4549 0.6873 1.2782 1.6560
## log2(regional_node_examined + 1) 0.8407 1.1894 0.7604 0.9296
##
## Concordance= 0.668 (se = 0.012 )
## Likelihood ratio test= 236.2 on 9 df, p=<2e-16
## Wald test = 238.6 on 9 df, p=<2e-16
## Score (logrank) test = 261 on 9 df, p=<2e-16
Model Evaluation
summary(final$survival_months)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.00 43.00 62.00 61.24 82.00 107.00
library(timeROC)
cox_models <- list(cox_model1, cox_model2, cox_model3, cox_model4, cox_model5, cox_model6)
time_points <- c(2,43,62,82,107)
dynamic_auc_results <- list()
for (i in 1:length(cox_models)) {
current_model <- cox_models[[i]]
time_roc <- timeROC(T = final$survival_months,
delta = final$status,
marker = predict(current_model, type = "lp"),
cause = 1, # Event of interest
times = time_points, # Time points
iid = TRUE)
dynamic_auc_results[[paste0("Model_", i)]] <- time_roc$AUC
print(paste("AUC for Model", i, "at time points:", paste(time_points, collapse = ", ")))
print(round(time_roc$AUC, 3))
}
## [1] "AUC for Model 1 at time points: 2, 43, 62, 82, 107"
## t=2 t=43 t=62 t=82 t=107
## NA 0.746 0.721 0.724 NA
## [1] "AUC for Model 2 at time points: 2, 43, 62, 82, 107"
## t=2 t=43 t=62 t=82 t=107
## NA 0.700 0.697 0.721 NA
## [1] "AUC for Model 3 at time points: 2, 43, 62, 82, 107"
## t=2 t=43 t=62 t=82 t=107
## NA 0.702 0.700 0.723 NA
## [1] "AUC for Model 4 at time points: 2, 43, 62, 82, 107"
## t=2 t=43 t=62 t=82 t=107
## NA 0.703 0.702 0.724 NA
## [1] "AUC for Model 5 at time points: 2, 43, 62, 82, 107"
## t=2 t=43 t=62 t=82 t=107
## NA 0.698 0.700 0.722 NA
## [1] "AUC for Model 6 at time points: 2, 43, 62, 82, 107"
## t=2 t=43 t=62 t=82 t=107
## NA 0.696 0.696 0.721 NA
model 4 tends to have a relatively higher AUC throughout different time points
C-index
cox_model1$concordance
## concordant discordant tied.x tied.y tied.xy concordance
## 3.518110e+05 1.553440e+05 0.000000e+00 2.219000e+03 0.000000e+00 6.936952e-01
## std
## 1.132421e-02
cox_model2$concordance
## concordant discordant tied.x tied.y tied.xy concordance
## 3.394840e+05 1.676700e+05 1.000000e+00 2.219000e+03 0.000000e+00 6.693900e-01
## std
## 1.156667e-02
cox_model3$concordance
## concordant discordant tied.x tied.y tied.xy concordance
## 3.405910e+05 1.665630e+05 1.000000e+00 2.219000e+03 0.000000e+00 6.715728e-01
## std
## 1.155125e-02
cox_model4$concordance
## concordant discordant tied.x tied.y tied.xy concordance
## 3.41080e+05 1.66074e+05 1.00000e+00 2.21900e+03 0.00000e+00 6.72537e-01
## std
## 1.15843e-02
cox_model5$concordance
## concordant discordant tied.x tied.y tied.xy concordance
## 3.394580e+05 1.676950e+05 2.000000e+00 2.219000e+03 0.000000e+00 6.693397e-01
## std
## 1.161257e-02
cox_model6$concordance
## concordant discordant tied.x tied.y tied.xy concordance
## 3.387630e+05 1.683460e+05 4.600000e+01 2.219000e+03 0.000000e+00 6.680127e-01
## std
## 1.158565e-02
Concordance: model1 69.4 model2 66.9 model3 67.2 model4 67.3 (overall account for meeting the model assumption and the multicollinearity, model 4 have the highest concordance score, although accompanied with “married” variable that is not statistically significant) model5 66.9 model6 66.8
dev_residuals <- residuals(cox_model4, type = "deviance")
plot(dev_residuals, main = "Deviance Residuals", ylab = "Residuals", xlab = "Index")
abline(h = c(-2, 2), col = "red", lty = 2)
surv_fit <- survfit(Surv(survival_months, status) ~ 1, data = final)
plot(surv_fit, xlab = "Time (months)", ylab = "Survival Probability",
main = "Survival Curve for the Final Model", col = "blue", lwd = 2)
grid()